capture log close
clear all
set more off

* Stablish Working Directory ***************************************************
cd "$workdirectory"

*-------------------------------------------------------------------------------
* Creating shares of subdistricts located within a FUA

* Importing intersection of FUAs and districts - Manually modified file to avoid formating problems (Empty spaces in lines)
import delimited "raw_datasets\Maps\intersection_adm3.csv", encoding(UTF-8) clear
egen geolevel3 = rowtotal(ipum2014 ipum2013 ipum2007)
save "processed_datasets\dataset_maps_and_cities_adm3", replace

* Importing area of districts
* We drop observations without geolevel3 and also observations with geolevel == 888888
import delimited "raw_datasets\Maps\area_subsubdist.csv", encoding(UTF-8) clear
egen geolevel3 = rowtotal(ipum2014 ipum2013 ipum2007)
keep geolevel3 cntry_code area_subsubdist
drop if geolevel3 == .
tempfile area_subsubdist
save `area_subsubdist', replace

* Merging and obtaining shares
use "processed_datasets\dataset_maps_and_cities_adm3", clear
merge m:1 cntry_code geolevel3 using `area_subsubdist'
keep if _merge == 3
drop _merge

gen share = area_calcu/area_subsubdist
sort cntry_code geolevel3
save "processed_datasets\dataset_maps_and_cities_adm3", replace

*-------------------------------------------------------------------------------
* Generating industry per city data

* Generating empty file to attach data to
clear all
tempfile data_indgen_cities_adm3
save `data_indgen_cities_adm3', empty replace

* Genearting local with all .csv in the folder
local files : dir "raw_datasets\Maps\indgen_maps_adm3" files "*.csv"

cd raw_datasets\Maps\indgen_maps_adm3

* Importing, appending and saving
foreach fi in `files' {
  import delimited `fi', varnames(10) rowrange(11) encoding(UTF-8) clear
  gen country_year = subinstr("`fi'",".csv","",.)
  append using `data_indgen_cities_adm3'
  save `data_indgen_cities_adm3', replace
}

* Cleaning
keep if v1 == "Weighted N"
drop v1
rename v2 census
drop if census == "COL TOTAL"

* Exctracting geolevel3
gen geolevel3 = regexs(0) if(regexm(census, "[0-9]+"))
destring geolevel3, replace

* Generating country codes
gen cntry_code = 0
replace cntry_code = 710 if country_year == "southafrica_2007"
replace cntry_code = 686 if country_year == "senegal_2013"
replace cntry_code = 104 if country_year == "myanmar_2014"

sort cntry_code geolevel3
save `data_indgen_cities_adm3', replace

********************************************************************************
cd ..\..\..
********************************************************************************

*-------------------------------------------------------------------------------
* Genearting final dataset

* Joining both datasets
use "processed_datasets\dataset_maps_and_cities_adm3", clear
joinby cntry_code geolevel3 using `data_indgen_cities_adm3'

* Renaming industries

rename niunotinuniverse 						niu
rename agriculturefishingandforestry 			agri
rename miningandextraction 						mining
rename manufacturing 							mfg
rename electricitygaswaterandwastemanag 		utilities
rename construction 							construction
rename wholesaleandretailtrade 					trade
rename hotelsandrestaurants 					hospitality
rename transportationstorageandcommunic 		transport
rename financialservicesandinsurance 			fin_insu
rename publicadministrationanddefense 			govmt
rename businessservicesandrealestate 			bussserv_rs
rename education 								educ
rename healthandsocialwork 						health
rename otherservices 							other_serv
rename privatehouseholdservices 				house_serv
rename unknown 									unknown
rename rowtotal 								rowtotal	
// rename otherindustrynec 						other_industry
// rename servicesnotspecified 					serv_notsp
// rename responsesuppressed						resp_supressed

* Applying Shares
foreach v of varlist niu agri mining mfg utilities construction trade hospitality transport fin_insu govmt bussserv_rs educ health other_serv house_serv unknown rowtotal{
	gen _`v' = `v'*share
}

* Fixing country names from both datasets
replace cntry_na_1 = "South Africa" if cntry_na_1 == "SouthAfrica"

* Drop intersections which associate districts from country i to a city located in country j
drop if cntry_name != cntry_na_1

* Collapsing by city and census
collapse (sum) _* (mean) fua_p_2015 (first) efua_name, by(efua_id country_year)

foreach v2 of varlist _agri _mining _mfg _utilities _construction _trade _hospitality _transport _fin_insu _govmt _bussserv_rs _educ _health _other_serv _house_serv _unknown{
	gen share`v2' = `v2'/(_rowtotal-_niu)
}

* We create 6 variables for industries that don't exists in these countries
* census, so that they don't appear with *missing* when appending
gen _other_industry 		= 0
gen _serv_notsp   			= 0
gen _resp_supressed 		= 0
gen share_other_industry 	= 0
gen share_serv_notsp		= 0
gen share_resp_supressed	= 0

save "processed_datasets\dataset_maps_and_cities_adm3", replace
